//==========================================================================
// Copyright (c) 2000-2008,  Elastos, Inc.  All Rights Reserved.
//==========================================================================
/*-
 * Copyright (c) 2001, 2002, 2003
 *  Bosko Milekic <bmilekic@FreeBSD.org>.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. The name of the author may not be used to endorse or promote products
 *    derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */
//#include <sys/netmisc.h>
//#include <sys/cdefs.h>
//__FBSDID("$FreeBSD: src/sys/kern/subr_mbuf.c,v 1.55 2003/08/16 19:48:52 bmilekic Exp $");
//
//#include "opt_mac.h"
//#include "opt_param.h"
//
//#include <sys/param.h>
//#include <sys/systm.h>
//#include <sys/malloc.h>
//#include <sys/mac.h>
//#include <sys/mbuf.h>
//#include <sys/lock.h>
//#include <sys/mutex.h>
//#include <sys/condvar.h>
//#include <sys/smp.h>
//#include <sys/kernel.h>
//#include <sys/sysctl.h>
//#include <sys/domain.h>
//#include <sys/protosw.h>

//#include <vm/vm.h>
//#include <vm/vm_kern.h>
//#include <vm/vm_extern.h>
//#include <vm/pmap.h>
//#include <vm/vm_map.h>
#include "mbuf_.h"
#include "misc.h"

/*
 * mb_alloc: network buffer allocator
 *
 * XXX: currently, the "low watermark" sysctl is marked read-only as its
 * effects are not completely implemented.  To be fixed soon.
 */

/*
 * Maximum number of PCPU containers. If you know what you're doing you could
 * explicitly define MBALLOC_NCPU to be exactly the number of CPUs on your
 * system during compilation, and thus prevent kernel structure bloat.
 *
 * SMP and non-SMP kernels clearly have a different number of possible CPUs,
 * but because we cannot assume a dense array of CPUs, we always allocate
 * and traverse PCPU containers up to NCPU amount and merely check for
 * CPU availability.
 */
#ifdef MBALLOC_NCPU
#define NCPU    MBALLOC_NCPU
#else
#define NCPU    MAXCPU
#endif

/*-
 * The mbuf allocator is based on Alfred Perlstein's <alfred@FreeBSD.org>
 * "memcache" proof-of-concept allocator which was itself based on
 * several well-known SMP-friendly allocators.
 *
 * The mb_alloc mbuf allocator is a special when compared to other
 * general-purpose allocators.  Some things to take note of:
 *
 *   Mbufs and mbuf clusters are two different objects.  Sometimes we
 *   will allocate a single mbuf, other times a single cluster,
 *   other times both.  Further, we may sometimes wish to allocate a
 *   whole chain of mbufs with clusters.  This allocator will perform
 *   the common case of each scenario in one function call (this
 *   includes constructing or destructing the object) while only
 *   locking/unlocking the cache once, if it can get away with it.
 *   The caches consist of pure mbufs and pure clusters; that is
 *   there are no 'zones' containing mbufs with already pre-hooked
 *   clusters.  Since we can allocate both objects atomically anyway,
 *   we don't bother fragmenting our caches for any particular 'scenarios.'
 *
 *   We allocate from seperate sub-maps of kmem_map, thus imposing
 *   an ultimate upper-limit on the number of allocatable clusters
 *   and mbufs and also, since the clusters all come from a
 *   virtually contiguous region, we can keep reference counters
 *   for them and "allocate" them purely by indexing into a
 *   dense refcount vector.
 *
 *   We call out to protocol drain routines (which can be hooked
 *   into us) when we're low on space.
 *
 * The mbuf allocator keeps all objects that it allocates in mb_buckets.
 * The buckets keep a number of objects (an object can be an mbuf or an
 * mbuf cluster) and facilitate moving larger sets of contiguous objects
 * from the per-CPU caches to the global cache. The buckets also have
 * the added advantage that objects, when migrated from cache to cache,
 * are migrated in chunks that keep contiguous objects together,
 * minimizing TLB pollution.
 *
 * The buckets are kept on singly-linked lists called "containers." A container
 * is protected by a mutex in order to ensure consistency.  The mutex
 * itself is allocated separately and attached to the container at boot time,
 * thus allowing for certain containers to share the same lock.  Per-CPU
 * containers for mbufs and mbuf clusters all share the same per-CPU
 * lock whereas the global cache containers for these objects share one
 * global lock.
 */
struct mb_bucket {
    SLIST_ENTRY(mb_bucket) mb_blist;
    int     mb_owner;
    int mb_numfree;
    void    *mb_free[0];
};

struct mb_container {
    SLIST_HEAD(mc_buckethd, mb_bucket) mc_bhead;
    struct  mtx *mc_lock;
    int mc_numowner;
    u_int   mc_starved;
    long    *mc_types;
    u_long  *mc_objcount;
    u_long  *mc_numbucks;
};

struct mb_gen_list {
    struct  mb_container mb_cont;
    struct  cv mgl_mstarved;
};

struct mb_pcpu_list {
    struct  mb_container mb_cont;
};

/*
 * Boot-time configurable object counts that will determine the maximum
 * number of permitted objects in the mbuf and mcluster cases.  In the
 * ext counter (nmbcnt) case, it's just an indicator serving to scale
 * kmem_map size properly - in other words, we may be allowed to allocate
 * more than nmbcnt counters, whereas we will never be allowed to allocate
 * more than nmbufs mbufs or nmbclusters mclusters.
 * As for nsfbufs, it is used to indicate how many sendfile(2) buffers will be
 * allocatable by the sfbuf allocator (found in uipc_syscalls.c)
 */
#ifndef NMBCLUSTERS
#define NMBCLUSTERS (1024 + MAXUSERS * 64)
//#define NMBCLUSTERS (10 + MAXUSERS * 64)
#endif
#ifndef NMBUFS
#define NMBUFS      (nmbclusters * 2)
#endif
#ifndef NSFBUFS
#define NSFBUFS     (512 + MAXUSERS * 16)
//#define NSFBUFS     (5 + MAXUSERS * 16)
#endif
#ifndef NMBCNTS
#define NMBCNTS     (nmbclusters + nsfbufs)
#endif
int nmbufs;
int nmbclusters;
int nmbcnt;
int nsfbufs;

/*
 * Sizes of objects per bucket.  There are this size's worth of mbufs
 * or clusters in each bucket.  Please keep these a power-of-2.
 */
#define MBUF_BUCK_SZ    (PAGE_SIZE * 2)
#define CLUST_BUCK_SZ   (PAGE_SIZE * 4)
//#define MBUF_BUCK_SZ    (512)
//#define CLUST_BUCK_SZ   (1024)

/*
 * Perform sanity checks of tunables declared above.
 */
void
tunable_mbinit(void *dummy)
{
    /*
     * This has to be done before VM init.
     */
    nmbclusters = NMBCLUSTERS;
    TUNABLE_INT_FETCH("kern.ipc.nmbclusters", &nmbclusters);
    nmbufs = NMBUFS;
    TUNABLE_INT_FETCH("kern.ipc.nmbufs", &nmbufs);
    nsfbufs = NSFBUFS;
    TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs);
    nmbcnt = NMBCNTS;
    TUNABLE_INT_FETCH("kern.ipc.nmbcnt", &nmbcnt);
    /* Sanity checks */
    if (nmbufs < nmbclusters * 2)
        nmbufs = nmbclusters * 2;
    if (nmbcnt < nmbclusters + nsfbufs)
        nmbcnt = nmbclusters + nsfbufs;
}
//SYSINIT(tunable_mbinit, SI_SUB_TUNABLES, SI_ORDER_ANY, tunable_mbinit, NULL);

/*
 * The freelist structures and mutex locks.  The number statically declared
 * here depends on the number of CPUs.
 *
 * We set up in such a way that all the objects (mbufs, clusters)
 * share the same mutex lock.  It has been established that we do not benefit
 * from different locks for different objects, so we use the same lock,
 * regardless of object type.  This also allows us to do optimised
 * multi-object allocations without dropping the lock in between.
 */
struct mb_lstmngr {
    struct mb_gen_list *ml_genlist;
    struct mb_pcpu_list *ml_cntlst[NCPU];
    struct mb_bucket **ml_btable;
    vm_offset_t ml_mapcur;
    vm_offset_t ml_mapbase;
    vm_offset_t ml_maptop;
    int     ml_mapfull;
    u_int       ml_objsize;
    u_int       ml_objbucks;
    u_int       *ml_wmhigh;
    u_int       *ml_wmlow;
};
static struct mb_lstmngr mb_list_mbuf, mb_list_clust;
static struct mtx mbuf_gen, mbuf_pcpu[NCPU];
static u_int *cl_refcntmap;

/*
 * Local macros for internal allocator structure manipulations.
 */
#ifdef SMP
#define MB_GET_PCPU_LIST(mb_lst)    (mb_lst)->ml_cntlst[PCPU_GET(cpuid)]
#else
#define MB_GET_PCPU_LIST(mb_lst)    (mb_lst)->ml_cntlst[0]
#endif

#define MB_GET_GEN_LIST(mb_lst)     (mb_lst)->ml_genlist

#define MB_LOCK_CONT(mb_cnt)        mtx_lock((mb_cnt)->mb_cont.mc_lock)

#define MB_UNLOCK_CONT(mb_cnt)      mtx_unlock((mb_cnt)->mb_cont.mc_lock)

#define MB_GET_PCPU_LIST_NUM(mb_lst, num)               \
    (mb_lst)->ml_cntlst[(num)]

#define MB_BUCKET_INDX(mb_obj, mb_lst)                  \
    (int)(((caddr_t)(mb_obj) - (caddr_t)(mb_lst)->ml_mapbase) /     \
    ((mb_lst)->ml_objbucks * (mb_lst)->ml_objsize))

#define MB_GET_OBJECT(mb_objp, mb_bckt, mb_lst)             \
{                                   \
    struct mc_buckethd *_mchd = &((mb_lst)->mb_cont.mc_bhead);  \
                                    \
    (mb_bckt)->mb_numfree--;                    \
    (mb_objp) = (mb_bckt)->mb_free[((mb_bckt)->mb_numfree)];    \
    (*((mb_lst)->mb_cont.mc_objcount))--;               \
    if ((mb_bckt)->mb_numfree == 0) {               \
        SLIST_REMOVE_HEAD(_mchd, mb_blist);         \
        SLIST_NEXT((mb_bckt), mb_blist) = NULL;         \
        (mb_bckt)->mb_owner |= MB_BUCKET_FREE;          \
    }                               \
}

#define MB_PUT_OBJECT(mb_objp, mb_bckt, mb_lst)             \
    (mb_bckt)->mb_free[((mb_bckt)->mb_numfree)] = (mb_objp);    \
    (mb_bckt)->mb_numfree++;                    \
    (*((mb_lst)->mb_cont.mc_objcount))++;

#define MB_MBTYPES_INC(mb_cnt, mb_type, mb_num)             \
    if ((mb_type) != MT_NOTMBUF)                    \
        (*((mb_cnt)->mb_cont.mc_types + (mb_type))) += (mb_num)

#define MB_MBTYPES_DEC(mb_cnt, mb_type, mb_num)             \
    if ((mb_type) != MT_NOTMBUF)                    \
        (*((mb_cnt)->mb_cont.mc_types + (mb_type))) -= (mb_num)

/*
 * Ownership of buckets/containers is represented by integers.  The PCPU
 * lists range from 0 to NCPU-1.  We need a free numerical id for the general
 * list (we use NCPU).  We also need a non-conflicting free bit to indicate
 * that the bucket is free and removed from a container, while not losing
 * the bucket's originating container id.  We use the highest bit
 * for the free marker.
 */
#define MB_GENLIST_OWNER    (NCPU)
#define MB_BUCKET_FREE      (1 << (sizeof(int) * 8 - 1))

/* Statistics structures for allocator (per-CPU and general). */
static struct mbpstat mb_statpcpu[NCPU + 1];
struct mbstat mbstat;

/* Sleep time for wait code (in ticks). */
static int mbuf_wait = 64;

static u_int mbuf_hiwm = 512;   /* High wm on  # of mbufs per cache */
static u_int mbuf_lowm = 128;   /* Low wm on # of mbufs per cache */
static u_int clust_hiwm = 128;  /* High wm on # of clusters per cache */
static u_int clust_lowm = 16;   /* Low wm on # of clusters per cache */

/*
 * Objects exported by sysctl(8).
 */
//SYSCTL_DECL(_kern_ipc);
//SYSCTL_INT(_kern_ipc, OID_AUTO, nmbclusters, CTLFLAG_RD, &nmbclusters, 0,
//    "Maximum number of mbuf clusters available");
//SYSCTL_INT(_kern_ipc, OID_AUTO, nmbufs, CTLFLAG_RD, &nmbufs, 0,
//    "Maximum number of mbufs available");
//SYSCTL_INT(_kern_ipc, OID_AUTO, nmbcnt, CTLFLAG_RD, &nmbcnt, 0,
//    "Number used to scale kmem_map to ensure sufficient space for counters");
//SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RD, &nsfbufs, 0,
//    "Maximum number of sendfile(2) sf_bufs available");
//SYSCTL_INT(_kern_ipc, OID_AUTO, mbuf_wait, CTLFLAG_RW, &mbuf_wait, 0,
//    "Sleep time of mbuf subsystem wait allocations during exhaustion");
//SYSCTL_UINT(_kern_ipc, OID_AUTO, mbuf_hiwm, CTLFLAG_RW, &mbuf_hiwm, 0,
//    "Upper limit of number of mbufs allowed in each cache");
//SYSCTL_UINT(_kern_ipc, OID_AUTO, mbuf_lowm, CTLFLAG_RD, &mbuf_lowm, 0,
//    "Lower limit of number of mbufs allowed in each cache");
//SYSCTL_UINT(_kern_ipc, OID_AUTO, clust_hiwm, CTLFLAG_RW, &clust_hiwm, 0,
//    "Upper limit of number of mbuf clusters allowed in each cache");
//SYSCTL_UINT(_kern_ipc, OID_AUTO, clust_lowm, CTLFLAG_RD, &clust_lowm, 0,
//    "Lower limit of number of mbuf clusters allowed in each cache");
//SYSCTL_STRUCT(_kern_ipc, OID_AUTO, mbstat, CTLFLAG_RD, &mbstat, mbstat,
//    "Mbuf general information and statistics");
//SYSCTL_OPAQUE(_kern_ipc, OID_AUTO, mb_statpcpu, CTLFLAG_RD, mb_statpcpu,
//    sizeof(mb_statpcpu), "S,", "Mbuf allocator per CPU statistics");

/*
 * Prototypes of local allocator routines.
 */
static void     *mb_alloc_wait(struct mb_lstmngr *, short);
static struct mb_bucket *mb_pop_cont(struct mb_lstmngr *, int,
                struct mb_pcpu_list *);
static void      mb_reclaim(void);
/*static*/ void      mbuf_init(void *);
int ticks;
int hz = 100;
/*
 * Initial allocation numbers.  Each parameter represents the number of buckets
 * of each object that will be placed initially in each PCPU container for
 * said object.
 */
#define NMB_MBUF_INIT   2
#define NMB_CLUST_INIT  8

/*
 * Internal flags that allow for cache locks to remain "persistent" across
 * allocation and free calls.  They may be used in combination.
 */
#define MBP_PERSIST 0x1 /* Return with lock still held. */
#define MBP_PERSISTENT  0x2 /* Cache lock is already held coming in. */

/*
 * Initialize the mbuf subsystem.
 *
 * We sub-divide the kmem_map into several submaps; this way, we don't have
 * to worry about artificially limiting the number of mbuf or mbuf cluster
 * allocations, due to fear of one type of allocation "stealing" address
 * space initially reserved for another.
 *
 * Set up both the general containers and all the PCPU containers.  Populate
 * the PCPU containers with initial numbers.
 */
//MALLOC_DEFINE(M_MBUF, "mbufmgr", "mbuf subsystem management structures");
//SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbuf_init, NULL)
//static

void mbuf_init(void *dummy)
{
    struct mb_pcpu_list *pcpu_cnt;
    vm_size_t mb_map_size;
    int i, j;
    void *p;
    /*
     * Set up all the submaps, for each type of object that we deal
     * with in this allocator.
     */

    mb_map_size = (vm_size_t)(nmbufs * MSIZE);
    mb_map_size = rounddown(mb_map_size, MBUF_BUCK_SZ);

//mb_map_size = 2*1024;//wenli

    MALLOC(mb_list_mbuf.ml_btable, struct mb_bucket **, (unsigned long)mb_map_size /
        MBUF_BUCK_SZ * sizeof(struct mb_bucket *), M_MBUF, M_NOWAIT);
    if (mb_list_mbuf.ml_btable == NULL)
        goto bad;
    p = kmem_suballoc(kmem_map, &(mb_list_mbuf.ml_mapbase),
        &(mb_list_mbuf.ml_maptop), mb_map_size);
    if (p == NULL)
        goto bad;
    mb_list_mbuf.ml_mapcur = mb_list_mbuf.ml_mapbase;
//  mb_list_mbuf.ml_map->system_map = 1;
    mb_list_mbuf.ml_mapfull = 0;
    mb_list_mbuf.ml_objsize = MSIZE;
    mb_list_mbuf.ml_objbucks = MBUF_BUCK_SZ / mb_list_mbuf.ml_objsize;
    mb_list_mbuf.ml_wmhigh = &mbuf_hiwm;
    mb_list_mbuf.ml_wmlow = &mbuf_lowm;

    mb_map_size = (vm_size_t)(nmbclusters * MCLBYTES);
    mb_map_size = rounddown(mb_map_size, CLUST_BUCK_SZ);


//mb_map_size = 2*1024;//wenli

    MALLOC(mb_list_clust.ml_btable, struct mb_bucket **, (unsigned long)mb_map_size /
        CLUST_BUCK_SZ * sizeof(struct mb_bucket *), M_MBUF, M_NOWAIT);
    if (mb_list_clust.ml_btable == NULL)
        goto bad;
    p = kmem_suballoc(kmem_map,
        &(mb_list_clust.ml_mapbase), &(mb_list_clust.ml_maptop),
        mb_map_size);
    if (p == NULL)
        goto bad;
    mb_list_clust.ml_mapcur = mb_list_clust.ml_mapbase;
//  mb_list_clust.ml_map->system_map = 1;
    mb_list_clust.ml_mapfull = 0;
    mb_list_clust.ml_objsize = MCLBYTES;
    mb_list_clust.ml_objbucks = CLUST_BUCK_SZ / mb_list_clust.ml_objsize;
    mb_list_clust.ml_wmhigh = &clust_hiwm;
    mb_list_clust.ml_wmlow = &clust_lowm;

    /*
     * Allocate required general (global) containers for each object type.
     */
    MALLOC(mb_list_mbuf.ml_genlist, struct mb_gen_list *, sizeof(struct mb_gen_list), M_MBUF,
        M_NOWAIT);
    MALLOC(mb_list_clust.ml_genlist, struct mb_gen_list *, sizeof(struct mb_gen_list), M_MBUF,
        M_NOWAIT);
    if ((mb_list_mbuf.ml_genlist == NULL) ||
        (mb_list_clust.ml_genlist == NULL))
        goto bad;

    /*
     * Initialize condition variables and general container mutex locks.
     */
    mtx_init(&mbuf_gen, "mbuf subsystem general lists lock", NULL, 0);
    cv_init(&(mb_list_mbuf.ml_genlist->mgl_mstarved), "mbuf pool starved");
    cv_init(&(mb_list_clust.ml_genlist->mgl_mstarved),
        "mcluster pool starved");
    mb_list_mbuf.ml_genlist->mb_cont.mc_lock =
        mb_list_clust.ml_genlist->mb_cont.mc_lock = &mbuf_gen;

    /*
     * Set up the general containers for each object.
     */
    mb_list_mbuf.ml_genlist->mb_cont.mc_numowner =
        mb_list_clust.ml_genlist->mb_cont.mc_numowner = MB_GENLIST_OWNER;
    mb_list_mbuf.ml_genlist->mb_cont.mc_starved =
        mb_list_clust.ml_genlist->mb_cont.mc_starved = 0;
    mb_list_mbuf.ml_genlist->mb_cont.mc_objcount =
        &(mb_statpcpu[MB_GENLIST_OWNER].mb_mbfree);
    mb_list_clust.ml_genlist->mb_cont.mc_objcount =
        &(mb_statpcpu[MB_GENLIST_OWNER].mb_clfree);
    mb_list_mbuf.ml_genlist->mb_cont.mc_numbucks =
        &(mb_statpcpu[MB_GENLIST_OWNER].mb_mbbucks);
    mb_list_clust.ml_genlist->mb_cont.mc_numbucks =
        &(mb_statpcpu[MB_GENLIST_OWNER].mb_clbucks);
    mb_list_mbuf.ml_genlist->mb_cont.mc_types =
        &(mb_statpcpu[MB_GENLIST_OWNER].mb_mbtypes[0]);
    mb_list_clust.ml_genlist->mb_cont.mc_types = NULL;
    SLIST_INIT(&(mb_list_mbuf.ml_genlist->mb_cont.mc_bhead));
    SLIST_INIT(&(mb_list_clust.ml_genlist->mb_cont.mc_bhead));

    /*
     * Allocate all the required counters for clusters.  This makes
     * cluster allocations/deallocations much faster.
     */
//    nmbclusters = 1000;

    MALLOC(cl_refcntmap, u_int *, nmbclusters * sizeof(u_int), M_MBUF, M_NOWAIT);
    if (cl_refcntmap == NULL)
        goto bad;

    /*
     * Initialize general mbuf statistics.
     */
    mbstat.m_msize =  mb_list_mbuf.ml_objsize;
    mbstat.m_mclbytes = mb_list_clust.ml_objsize;
    mbstat.m_minclsize = MINCLSIZE;
    mbstat.m_mlen = MLEN;
    mbstat.m_mhlen = MHLEN;
    mbstat.m_numtypes = MT_NTYPES;
    mbstat.m_mbperbuck = mb_list_mbuf.ml_objbucks;
    mbstat.m_clperbuck = mb_list_clust.ml_objbucks;

    /*
     * Allocate and initialize PCPU containers.
     */
    for (i = 0; i < NCPU; i++) {
//      if (CPU_ABSENT(i)) {
//          mb_statpcpu[i].mb_active = 0;
//          continue;
//      }

        MALLOC(mb_list_mbuf.ml_cntlst[i], struct mb_pcpu_list *, sizeof(struct mb_pcpu_list),
            M_MBUF, M_NOWAIT);
        MALLOC(mb_list_clust.ml_cntlst[i], struct mb_pcpu_list *, sizeof(struct mb_pcpu_list),
            M_MBUF, M_NOWAIT);
        if ((mb_list_mbuf.ml_cntlst[i] == NULL) ||
            (mb_list_clust.ml_cntlst[i] == NULL))
            goto bad;

        mtx_init(&mbuf_pcpu[i], "mbuf PCPU list lock", NULL, 0);
        mb_list_mbuf.ml_cntlst[i]->mb_cont.mc_lock =
            mb_list_clust.ml_cntlst[i]->mb_cont.mc_lock = &mbuf_pcpu[i];

        mb_statpcpu[i].mb_active = 1;
        mb_list_mbuf.ml_cntlst[i]->mb_cont.mc_numowner =
            mb_list_clust.ml_cntlst[i]->mb_cont.mc_numowner = i;
        mb_list_mbuf.ml_cntlst[i]->mb_cont.mc_starved =
            mb_list_clust.ml_cntlst[i]->mb_cont.mc_starved = 0;
        mb_list_mbuf.ml_cntlst[i]->mb_cont.mc_objcount =
            &(mb_statpcpu[i].mb_mbfree);
        mb_list_clust.ml_cntlst[i]->mb_cont.mc_objcount =
            &(mb_statpcpu[i].mb_clfree);
        mb_list_mbuf.ml_cntlst[i]->mb_cont.mc_numbucks =
            &(mb_statpcpu[i].mb_mbbucks);
        mb_list_clust.ml_cntlst[i]->mb_cont.mc_numbucks =
            &(mb_statpcpu[i].mb_clbucks);
        mb_list_mbuf.ml_cntlst[i]->mb_cont.mc_types =
            &(mb_statpcpu[i].mb_mbtypes[0]);
        mb_list_clust.ml_cntlst[i]->mb_cont.mc_types = NULL;

        SLIST_INIT(&(mb_list_mbuf.ml_cntlst[i]->mb_cont.mc_bhead));
        SLIST_INIT(&(mb_list_clust.ml_cntlst[i]->mb_cont.mc_bhead));

        /*
         * Perform initial allocations.
         */
        pcpu_cnt = MB_GET_PCPU_LIST_NUM(&mb_list_mbuf, i);
        MB_LOCK_CONT(pcpu_cnt);
        for (j = 0; j < NMB_MBUF_INIT; j++) {
            if (mb_pop_cont(&mb_list_mbuf, M_DONTWAIT, pcpu_cnt)
                == NULL)
                goto bad;
        }
        MB_UNLOCK_CONT(pcpu_cnt);

        pcpu_cnt = MB_GET_PCPU_LIST_NUM(&mb_list_clust, i);
        MB_LOCK_CONT(pcpu_cnt);
        for (j = 0; j < NMB_CLUST_INIT; j++) {
            if (mb_pop_cont(&mb_list_clust, M_DONTWAIT, pcpu_cnt)
                == NULL)
                goto bad;
        }
        MB_UNLOCK_CONT(pcpu_cnt);
    }

    return;
bad:
    kprintf("mbuf_init(): failed to initialize mbuf subsystem!");
}

/*
 * Populate a given mbuf PCPU container with a bucket full of fresh new
 * buffers.  Return a pointer to the new bucket (already in the container if
 * successful), or return NULL on failure.
 *
 * LOCKING NOTES:
 * PCPU container lock must be held when this is called.
 * The lock is dropped here so that we can cleanly call the underlying VM
 * code.  If we fail, we return with no locks held. If we succeed (i.e., return
 * non-NULL), we return with the PCPU lock held, ready for allocation from
 * the returned bucket.
 */
static struct mb_bucket *
mb_pop_cont(struct mb_lstmngr *mb_list, int how, struct mb_pcpu_list *cnt_lst)
{
    struct mb_bucket *bucket;
    caddr_t p;
    int i;

    MB_UNLOCK_CONT(cnt_lst);
    /*
     * If our object's (finite) map is starved now (i.e., no more address
     * space), bail out now.
     */
    if (mb_list->ml_mapfull)
        return (NULL);

    MALLOC(bucket, struct mb_bucket *, sizeof(struct mb_bucket) +
        mb_list->ml_objbucks * sizeof(void *), M_MBUF, MBTOM(how));
    if (bucket == NULL)
        return (NULL);

    p = (caddr_t)mb_list->ml_mapcur;
//  p = (caddr_t)kmem_malloc(mb_list->ml_map, mb_list->ml_objsize *
//      mb_list->ml_objbucks, MBTOM(how));
//  if (p == NULL) {
//      FREE(bucket, M_MBUF);
//      if (how == M_TRYWAIT)
//          mb_list->ml_mapfull = 1;
//      return (NULL);
//  }

    bucket->mb_numfree = 0;
    mb_list->ml_btable[MB_BUCKET_INDX(p, mb_list)] = bucket;
    for (i = 0; i < mb_list->ml_objbucks; i++) {
        bucket->mb_free[i] = p;
        bucket->mb_numfree++;
        p += mb_list->ml_objsize;
    }
    mb_list->ml_mapcur = (vm_offset_t)p;
    MB_LOCK_CONT(cnt_lst);
    bucket->mb_owner = cnt_lst->mb_cont.mc_numowner;
    SLIST_INSERT_HEAD(&(cnt_lst->mb_cont.mc_bhead), bucket, mb_blist);
    (*(cnt_lst->mb_cont.mc_numbucks))++;
    *(cnt_lst->mb_cont.mc_objcount) += bucket->mb_numfree;

    return (bucket);
}

/*
 * Allocate a network buffer.
 * The general case is very easy.  Complications only arise if our PCPU
 * container is empty.  Things get worse if the PCPU container is empty,
 * the general container is empty, and we've run out of address space
 * in our map; then we try to block if we're willing to (M_TRYWAIT).
 */
static
void *
mb_alloc(struct mb_lstmngr *mb_list, int how, short type, short persist,
     int *pers_list)
{
    static int last_report;
    struct mb_pcpu_list *cnt_lst;
    struct mb_bucket *bucket;
    void *m;

#ifdef INVARIANTS
    int flags;

    flags = how & (M_WAITOK | M_NOWAIT | M_DONTWAIT | M_TRYWAIT);
    if (flags != M_DONTWAIT && flags != M_TRYWAIT) {
        static  struct timeval lasterr;
        static  int curerr;
        if (ppsratecheck(&lasterr, &curerr, 1)) {
            printf("Bad mbuf alloc flags: %x\n", flags);
            backtrace();
            how = M_TRYWAIT;
        }
    }
#endif

    m = NULL;
    if ((persist & MBP_PERSISTENT) != 0) {
        /*
         * If we're a "persistent" call, then the per-CPU #(pers_list)
         * cache lock is already held, and we just need to refer to
         * the correct cache descriptor.
         */
        cnt_lst = MB_GET_PCPU_LIST_NUM(mb_list, *pers_list);
    } else {
        cnt_lst = MB_GET_PCPU_LIST(mb_list);
        MB_LOCK_CONT(cnt_lst);
    }

    if ((bucket = SLIST_FIRST(&(cnt_lst->mb_cont.mc_bhead))) != NULL) {
        /*
         * This is the easy allocation case. We just grab an object
         * from a bucket in the PCPU container. At worst, we
         * have just emptied the bucket and so we remove it
         * from the container.
         */
                     kprintf("\nbefore mb_alloc_wait %08x\n", (unsigned int)m);
        MB_GET_OBJECT(m, bucket, cnt_lst);
                     kprintf("\nbefore mb_alloc_wait %08x\n", (unsigned int)m);
        MB_MBTYPES_INC(cnt_lst, type, 1);

        /* If asked to persist, do not drop the lock. */
        if ((persist & MBP_PERSIST) == 0)
            MB_UNLOCK_CONT(cnt_lst);
        else
            *pers_list = cnt_lst->mb_cont.mc_numowner;
    } else {
        struct mb_gen_list *gen_list;

        /*
         * This is the less-common more difficult case. We must
         * first verify if the general list has anything for us
         * and if that also fails, we must allocate a page from
         * the map and create a new bucket to place in our PCPU
         * container (already locked). If the map is starved then
         * we're really in for trouble, as we have to wait on
         * the general container's condition variable.
         */
        gen_list = MB_GET_GEN_LIST(mb_list);
        MB_LOCK_CONT(gen_list);

        if ((bucket = SLIST_FIRST(&(gen_list->mb_cont.mc_bhead)))
            != NULL) {
            /*
             * Give ownership of the bucket to our CPU's
             * container, but only actually put the bucket
             * in the container if it doesn't become free
             * upon removing an mbuf from it.
             */
            SLIST_REMOVE_HEAD(&(gen_list->mb_cont.mc_bhead),
                mb_blist);
            bucket->mb_owner = cnt_lst->mb_cont.mc_numowner;
            (*(gen_list->mb_cont.mc_numbucks))--;
            (*(cnt_lst->mb_cont.mc_numbucks))++;
            *(gen_list->mb_cont.mc_objcount) -= bucket->mb_numfree;
            bucket->mb_numfree--;
            m = bucket->mb_free[(bucket->mb_numfree)];
            if (bucket->mb_numfree == 0) {
                SLIST_NEXT(bucket, mb_blist) = NULL;
                bucket->mb_owner |= MB_BUCKET_FREE;
            } else {
                SLIST_INSERT_HEAD(&(cnt_lst->mb_cont.mc_bhead),
                     bucket, mb_blist);
                *(cnt_lst->mb_cont.mc_objcount) +=
                    bucket->mb_numfree;
            }
            MB_UNLOCK_CONT(gen_list);
            MB_MBTYPES_INC(cnt_lst, type, 1);

            /* If asked to persist, do not drop the lock. */
            if ((persist & MBP_PERSIST) == 0)
                MB_UNLOCK_CONT(cnt_lst);
            else
                *pers_list = cnt_lst->mb_cont.mc_numowner;
        } else {
            /*
             * We'll have to allocate a new page.
             */
            MB_UNLOCK_CONT(gen_list);
            bucket = mb_pop_cont(mb_list, how, cnt_lst);
            if (bucket != NULL) {
                MB_GET_OBJECT(m, bucket, cnt_lst);
                MB_MBTYPES_INC(cnt_lst, type, 1);

                /* If asked to persist, do not drop the lock. */
                if ((persist & MBP_PERSIST) == 0)
                    MB_UNLOCK_CONT(cnt_lst);
                else
                    *pers_list=cnt_lst->mb_cont.mc_numowner;
            } else {
                if (how == M_TRYWAIT) {
                    /*
                     * Absolute worst-case scenario.
                     * We block if we're willing to, but
                     * only after trying to steal from
                     * other lists.
                     */
                    m = mb_alloc_wait(mb_list, type);
                } else {
                    /* XXX: No consistency. */
                    mbstat.m_drops++;

                    if (ticks < last_report ||
                       (ticks - last_report) >= hz) {
                        last_report = ticks;
                        kprintf(
"All mbufs or mbuf clusters exhausted, please see tuning(7).\n");
                    }

                }
                if (m != NULL && (persist & MBP_PERSIST) != 0) {
                    cnt_lst = MB_GET_PCPU_LIST(mb_list);
                    MB_LOCK_CONT(cnt_lst);
                    *pers_list=cnt_lst->mb_cont.mc_numowner;
                }
            }
        }
    }
//kprintf("\nkernel  mb_alloc: %08x\n",(char*)m);
    return (m);
}

/*
 * This is the worst-case scenario called only if we're allocating with
 * M_TRYWAIT.  We first drain all the protocols, then try to find an mbuf
 * by looking in every PCPU container.  If we're still unsuccesful, we
 * try the general container one last time and possibly block on our
 * starved cv.
 */
static void *
mb_alloc_wait(struct mb_lstmngr *mb_list, short type)
{
    struct mb_pcpu_list *cnt_lst;
    struct mb_gen_list *gen_list;
    struct mb_bucket *bucket;
    void *m;
    int i, cv_ret;

    /*
     * Try to reclaim mbuf-related objects (mbufs, clusters).
     */
    mb_reclaim();

    /*
     * Cycle all the PCPU containers. Increment starved counts if found
     * empty.
     */
    for (i = 0; i < NCPU; i++) {
//      if (CPU_ABSENT(i))
//          continue;
        cnt_lst = MB_GET_PCPU_LIST_NUM(mb_list, i);
        MB_LOCK_CONT(cnt_lst);

        /*
         * If container is non-empty, get a single object from it.
         * If empty, increment starved count.
         */
        if ((bucket = SLIST_FIRST(&(cnt_lst->mb_cont.mc_bhead))) !=
            NULL) {
            MB_GET_OBJECT(m, bucket, cnt_lst);
            MB_MBTYPES_INC(cnt_lst, type, 1);
            MB_UNLOCK_CONT(cnt_lst);
            mbstat.m_wait++;    /* XXX: No consistency. */
            return (m);
        } else
            cnt_lst->mb_cont.mc_starved++;

        MB_UNLOCK_CONT(cnt_lst);
    }

    /*
     * We're still here, so that means it's time to get the general
     * container lock, check it one more time (now that mb_reclaim()
     * has been called) and if we still get nothing, block on the cv.
     */
    gen_list = MB_GET_GEN_LIST(mb_list);
    MB_LOCK_CONT(gen_list);
    if ((bucket = SLIST_FIRST(&(gen_list->mb_cont.mc_bhead))) != NULL) {
        MB_GET_OBJECT(m, bucket, gen_list);
        MB_MBTYPES_INC(gen_list, type, 1);
        MB_UNLOCK_CONT(gen_list);
        mbstat.m_wait++;    /* XXX: No consistency. */
        return (m);
    }

    gen_list->mb_cont.mc_starved++;
    cv_ret = cv_timedwait(&(gen_list->mgl_mstarved),
        gen_list->mb_cont.mc_lock, mbuf_wait);
    gen_list->mb_cont.mc_starved--;

    if ((cv_ret == 0) &&
        ((bucket = SLIST_FIRST(&(gen_list->mb_cont.mc_bhead))) != NULL)) {
        MB_GET_OBJECT(m, bucket, gen_list);
        MB_MBTYPES_INC(gen_list, type, 1);
        mbstat.m_wait++;    /* XXX: No consistency. */
    } else {
        mbstat.m_drops++;   /* XXX: No consistency. */
        m = NULL;
    }

    MB_UNLOCK_CONT(gen_list);

    return (m);
}

/*-
 * Free an object to its rightful container.
 * In the very general case, this operation is really very easy.
 * Complications arise primarily if:
 *  (a) We've hit the high limit on number of free objects allowed in
 *      our PCPU container.
 *  (b) We're in a critical situation where our container has been
 *      marked 'starved' and we need to issue wakeups on the starved
 *      condition variable.
 *  (c) Minor (odd) cases: our bucket has migrated while we were
 *      waiting for the lock; our bucket is in the general container;
 *      our bucket is empty.
 */
static
void
mb_free(struct mb_lstmngr *mb_list, void *m, short type, short persist,
    int *pers_list)
{
    struct mb_pcpu_list *cnt_lst;
    struct mb_gen_list *gen_list;
    struct mb_bucket *bucket;
    u_int owner;

    bucket = mb_list->ml_btable[MB_BUCKET_INDX(m, mb_list)];

    /*
     * Make sure that if after we lock the bucket's present container the
     * bucket has migrated, that we drop the lock and get the new one.
     */
retry_lock:
    owner = bucket->mb_owner & ~MB_BUCKET_FREE;
    switch (owner) {
    case MB_GENLIST_OWNER:
        gen_list = MB_GET_GEN_LIST(mb_list);
        if (((persist & MBP_PERSISTENT) != 0) && (*pers_list >= 0)) {
            if (*pers_list != MB_GENLIST_OWNER) {
                cnt_lst = MB_GET_PCPU_LIST_NUM(mb_list,
                    *pers_list);
                MB_UNLOCK_CONT(cnt_lst);
                MB_LOCK_CONT(gen_list);
            }
        } else {
            MB_LOCK_CONT(gen_list);
        }
        if (owner != (bucket->mb_owner & ~MB_BUCKET_FREE)) {
            MB_UNLOCK_CONT(gen_list);
            *pers_list = -1;
            goto retry_lock;
        }

        /*
         * If we're intended for the general container, this is
         * real easy: no migrating required. The only `bogon'
         * is that we're now contending with all the threads
         * dealing with the general list, but this is expected.
         */
        MB_PUT_OBJECT(m, bucket, gen_list);
        MB_MBTYPES_DEC(gen_list, type, 1);
        if (bucket->mb_owner & MB_BUCKET_FREE) {
            SLIST_INSERT_HEAD(&(gen_list->mb_cont.mc_bhead),
                bucket, mb_blist);
            bucket->mb_owner = MB_GENLIST_OWNER;
        }
        if (gen_list->mb_cont.mc_starved > 0)
            cv_signal(&(gen_list->mgl_mstarved));
        if ((persist & MBP_PERSIST) == 0)
            MB_UNLOCK_CONT(gen_list);
        else
            *pers_list = MB_GENLIST_OWNER;
        break;

    default:
        cnt_lst = MB_GET_PCPU_LIST_NUM(mb_list, owner);
        if (((persist & MBP_PERSISTENT) != 0) && (*pers_list >= 0)) {
            if (*pers_list == MB_GENLIST_OWNER) {
                gen_list = MB_GET_GEN_LIST(mb_list);
                MB_UNLOCK_CONT(gen_list);
                MB_LOCK_CONT(cnt_lst);
            } else {
                cnt_lst = MB_GET_PCPU_LIST_NUM(mb_list,
                    *pers_list);
                owner = *pers_list;
            }
        } else {
            MB_LOCK_CONT(cnt_lst);
        }
        if (owner != (bucket->mb_owner & ~MB_BUCKET_FREE)) {
            MB_UNLOCK_CONT(cnt_lst);
            *pers_list = -1;
            goto retry_lock;
        }

        MB_PUT_OBJECT(m, bucket, cnt_lst);
        MB_MBTYPES_DEC(cnt_lst, type, 1);
        if ((*(cnt_lst->mb_cont.mc_objcount) > *(mb_list->ml_wmhigh)) ||
            (cnt_lst->mb_cont.mc_starved > 0)) {
            /*
             * We've hit the high limit of allowed numbers of mbufs
             * on this PCPU list or we've been flagged that we need
             * to transfer a bucket over to the general cache.
             * We must now migrate a bucket over to the general
             * container.
             */
            gen_list = MB_GET_GEN_LIST(mb_list);
            MB_LOCK_CONT(gen_list);
            if ((bucket->mb_owner & MB_BUCKET_FREE) == 0) {
                bucket =
                    SLIST_FIRST(&(cnt_lst->mb_cont.mc_bhead));
                SLIST_REMOVE_HEAD(&(cnt_lst->mb_cont.mc_bhead),
                    mb_blist);
            }
            SLIST_INSERT_HEAD(&(gen_list->mb_cont.mc_bhead),
                bucket, mb_blist);
            bucket->mb_owner = MB_GENLIST_OWNER;
            *(cnt_lst->mb_cont.mc_objcount) -= bucket->mb_numfree;
            *(gen_list->mb_cont.mc_objcount) += bucket->mb_numfree;
            (*(cnt_lst->mb_cont.mc_numbucks))--;
            (*(gen_list->mb_cont.mc_numbucks))++;

            /*
             * While we're at it, transfer some of the mbtypes
             * "count load" onto the general list's mbtypes
             * array, seeing as how we're moving the bucket
             * there now, meaning that the freeing of objects
             * there will now decrement the _general list's_
             * mbtypes counters, and no longer our PCPU list's
             * mbtypes counters. We do this for the type presently
             * being freed in an effort to keep the mbtypes
             * counters approximately balanced across all lists.
             */
            MB_MBTYPES_DEC(cnt_lst, type,
                mb_list->ml_objbucks - bucket->mb_numfree);
            MB_MBTYPES_INC(gen_list, type,
                mb_list->ml_objbucks - bucket->mb_numfree);

            if (cnt_lst->mb_cont.mc_starved > 0) {
                /*
                 * Determine whether or not to keep
                 * transferring buckets to the general list
                 * or whether we've transferred enough already.
                 * The thread that is blocked may end up waking
                 * up in the meantime, but transferring an
                 * extra bucket in a constrained situation
                 * is not so bad, as we're likely to need
                 * it soon anyway.
                 */
                if (gen_list->mb_cont.mc_starved > 0) {
                    cnt_lst->mb_cont.mc_starved--;
                    cv_signal(&(gen_list->mgl_mstarved));
                } else
                    cnt_lst->mb_cont.mc_starved = 0;
            }
            MB_UNLOCK_CONT(gen_list);
            if ((persist & MBP_PERSIST) == 0)
                MB_UNLOCK_CONT(cnt_lst);
            else
                *pers_list = owner;
            break;
        }

        if (bucket->mb_owner & MB_BUCKET_FREE) {
            SLIST_INSERT_HEAD(&(cnt_lst->mb_cont.mc_bhead),
                bucket, mb_blist);
            bucket->mb_owner = cnt_lst->mb_cont.mc_numowner;
        }

        if ((persist & MBP_PERSIST) == 0)
            MB_UNLOCK_CONT(cnt_lst);
        else
            *pers_list = owner;
        break;
    }
}

/*
 * Drain protocols in hopes to free up some resources.
 *
 * LOCKING NOTES:
 * No locks should be held when this is called.  The drain routines have to
 * presently acquire some locks which raises the possibility of lock order
 * violation if we're holding any mutex if that mutex is acquired in reverse
 * order relative to one of the locks in the drain routines.
 */
static void
mb_reclaim(void)
{
//    struct domain *dp;
//    struct protosw *pr;
    DebugBreak();
//  WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK | WARN_PANIC, NULL,
//      "mb_reclaim()");

    mbstat.m_drain++;   /* XXX: No consistency. */

//    for (dp = domains; dp != NULL; dp = dp->dom_next)
//        for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
//            if (pr->pr_drain != NULL)
//                (*pr->pr_drain)();
}

/******************************************************************************
 * Internal setup macros.
 */

#define _mb_setup(m, type) do {                     \
    (m)->m_type = (type);                       \
    (m)->m_next = NULL;                     \
    (m)->m_nextpkt = NULL;                      \
    (m)->m_data = (m)->m_dat;                   \
    (m)->m_flags = 0;                       \
} while (0)

#define _mbhdr_setup(m, type) do {                  \
    (m)->m_type = (type);                       \
    (m)->m_next = NULL;                     \
    (m)->m_nextpkt = NULL;                      \
    (m)->m_data = (m)->m_pktdat;                    \
    (m)->m_flags = M_PKTHDR;                    \
    (m)->m_pkthdr.rcvif = NULL;                 \
    (m)->m_pkthdr.csum_flags = 0;                   \
    SLIST_INIT(&(m)->m_pkthdr.tags);                \
} while (0)

#define _mcl_setup(m) do {                      \
    (m)->m_data = (m)->m_ext.ext_buf;               \
    (m)->m_flags |= M_EXT;                      \
    (m)->m_ext.ext_free = NULL;                 \
    (m)->m_ext.ext_args = NULL;                 \
    (m)->m_ext.ext_size = MCLBYTES;                 \
    (m)->m_ext.ext_type = EXT_CLUSTER;              \
} while (0)

#define _mext_init_ref(m, ref) do {                 \
	(m)->m_ext.ref_cnt = ((ref) == NULL) ?				\
	    (u_int*)DevMalloc(sizeof(u_int), M_MBUF, M_NOWAIT) : (u_int *)(ref);	\
    if ((m)->m_ext.ref_cnt != NULL) {               \
        *((m)->m_ext.ref_cnt) = 0;              \
        MEXT_ADD_REF((m));                  \
    }                               \
} while (0)

#define cl2ref(cl)                          \
    (((uintptr_t)(cl) - (uintptr_t)mb_list_clust.ml_mapbase) >> MCLSHIFT)

#define _mext_dealloc_ref(m)                        \
    if ((m)->m_ext.ext_type != EXT_EXTREF)              \
        FREE((m)->m_ext.ref_cnt, M_MBUF)

/******************************************************************************
 * Internal routines.
 *
 * Because mb_alloc() and mb_free() are inlines (to keep the common
 * cases down to a maximum of one function call), below are a few
 * routines used only internally for the sole purpose of making certain
 * functions smaller.
 *
 * - _mext_free(): frees associated storage when the ref. count is
 *   exactly one and we're freeing.
 *
 * - _mgetm_internal(): common "persistent-lock" routine that allocates
 *   an mbuf and a cluster in one shot, but where the lock is already
 *   held coming in (which is what makes it different from the exported
 *   m_getcl()).  The lock is dropped when done.  This is used by m_getm()
 *   and, therefore, is very m_getm()-specific.
 */
static struct mbuf *_mgetm_internal(int, short, short, int);

void
_mext_free(struct mbuf *mb)
{

    if (mb->m_ext.ext_type == EXT_CLUSTER) {
        mb_free(&mb_list_clust, (caddr_t)mb->m_ext.ext_buf, MT_NOTMBUF,
            0, NULL);
    } else {
        (*(mb->m_ext.ext_free))(mb->m_ext.ext_buf, mb->m_ext.ext_args);
        _mext_dealloc_ref(mb);
    }
}

static struct mbuf *
_mgetm_internal(int how, short type, short persist, int cchnum)
{
    struct mbuf *mb;

    mb = (struct mbuf *)mb_alloc(&mb_list_mbuf, how, type, persist,&cchnum);
    if (mb == NULL)
        return NULL;
    _mb_setup(mb, type);

    if ((persist & MBP_PERSIST) != 0) {
        mb->m_ext.ext_buf = (caddr_t)mb_alloc(&mb_list_clust,
            how, MT_NOTMBUF, MBP_PERSISTENT, &cchnum);
        if (mb->m_ext.ext_buf == NULL) {
            (void)m_free(mb);
            mb = NULL;
        }
        _mcl_setup(mb);
        _mext_init_ref(mb, &cl_refcntmap[cl2ref(mb->m_ext.ext_buf)]);
    }
    return (mb);
}

/******************************************************************************
 * Exported buffer allocation and de-allocation routines.
 */

/*
 * Allocate and return a single (normal) mbuf.  NULL is returned on failure.
 *
 * Arguments:
 *  - how: M_TRYWAIT to try to block for kern.ipc.mbuf_wait number of ticks
 *    if really starved for memory.  M_DONTWAIT to never block.
 *  - type: the type of the mbuf being allocated.
 */
struct mbuf *
m_get(int how, short type)
{
    struct mbuf *mb;

    mb = (struct mbuf *)mb_alloc(&mb_list_mbuf, how, type, 0, NULL);
    if (mb != NULL)
        _mb_setup(mb, type);
    return (mb);
}

/*
 * Allocate a given length worth of mbufs and/or clusters (whatever fits
 * best) and return a pointer to the top of the allocated chain.  If an
 * existing mbuf chain is provided, then we will append the new chain
 * to the existing one but still return the top of the newly allocated
 * chain.  NULL is returned on failure, in which case the [optional]
 * provided chain is left untouched, and any memory already allocated
 * is freed.
 *
 * Arguments:
 *  - m: existing chain to which to append new chain (optional).
 *  - len: total length of data to append, either in mbufs or clusters
 *    (we allocate whatever combination yields the best fit).
 *  - how: M_TRYWAIT to try to block for kern.ipc.mbuf_wait number of ticks
 *    if really starved for memory.  M_DONTWAIT to never block.
 *  - type: the type of the mbuf being allocated.
 */
struct mbuf *
m_getm(struct mbuf *m, int len, int how, short type)
{
    struct mbuf *mb, *top, *cur, *mtail;
    int num, rem, cchnum;
    short persist;
    int i;

    KASSERT(len >= 0, ("m_getm(): len is < 0"));

    /* If m != NULL, we will append to the end of that chain. */
    if (m != NULL)
        for (mtail = m; mtail->m_next != NULL; mtail = mtail->m_next);
    else
        mtail = NULL;

    /*
     * In the best-case scenario (which should be the common case
     * unless we're in a starvation situation), we will be able to
     * go through the allocation of all the desired mbufs and clusters
     * here without dropping our per-CPU cache lock in between.
     */
    num = len / MCLBYTES;
    rem = len % MCLBYTES;
    persist = 0;
    cchnum = -1;
    top = cur = NULL;
    for (i = 0; i < num; i++) {
        mb = (struct mbuf *)mb_alloc(&mb_list_mbuf, how, type,
            MBP_PERSIST | persist, &cchnum);
        if (mb == NULL)
            goto failed;
        _mb_setup(mb, type);
        mb->m_len = 0;

        persist = (i != (num - 1) || rem > 0) ? MBP_PERSIST : 0;
        mb->m_ext.ext_buf = (caddr_t)mb_alloc(&mb_list_clust,
            how, MT_NOTMBUF, persist | MBP_PERSISTENT, &cchnum);
        if (mb->m_ext.ext_buf == NULL) {
            (void)m_free(mb);
            goto failed;
        }
        _mcl_setup(mb);
        _mext_init_ref(mb, &cl_refcntmap[cl2ref(mb->m_ext.ext_buf)]);
        persist = MBP_PERSISTENT;

        if (cur == NULL)
            top = cur = mb;
        else
            cur = (cur->m_next = mb);
    }
    if (rem > 0) {
        if (cchnum >= 0) {
            persist = MBP_PERSISTENT;
            persist |= (rem > MINCLSIZE) ? MBP_PERSIST : 0;
            mb = _mgetm_internal(how, type, persist, cchnum);
            if (mb == NULL)
                goto failed;
        } else if (rem > MINCLSIZE) {
            mb = m_getcl(how, type, 0);
        } else {
            mb = m_get(how, type);
        }
        if (mb != NULL) {
            mb->m_len = 0;
            if (cur == NULL)
                top = mb;
            else
                cur->m_next = mb;
        } else
            goto failed;
    }

    if (mtail != NULL)
        mtail->m_next = top;
    return top;
failed:
    if (top != NULL)
        m_freem(top);
    return NULL;
}

/*
 * Allocate and return a single M_PKTHDR mbuf.  NULL is returned on failure.
 *
 * Arguments:
 *  - how: M_TRYWAIT to try to block for kern.ipc.mbuf_wait number of ticks
 *    if really starved for memory.  M_DONTWAIT to never block.
 *  - type: the type of the mbuf being allocated.
 */
struct mbuf *
m_gethdr(int how, short type)
{
    struct mbuf *mb;

//    mb = (struct mbuf *)mb_alloc(&mb_list_mbuf, how, type, 0, NULL);
    mb = (struct mbuf *)Malloc((MSIZE >=
            sizeof(struct mbuf)) ? MSIZE : sizeof(struct mbuf));
    if (mb != NULL) {
        _mbhdr_setup(mb, type);
#ifdef MAC
        if (mac_init_mbuf(mb, MBTOM(how)) != 0) {
            m_free(mb);
            return (NULL);
        }
#endif
    }
    return (mb);
}

/*
 * Allocate and return a single (normal) pre-zero'd mbuf.  NULL is
 * returned on failure.
 *
 * Arguments:
 *  - how: M_TRYWAIT to try to block for kern.ipc.mbuf_wait number of ticks
 *    if really starved for memory.  M_DONTWAIT to never block.
 *  - type: the type of the mbuf being allocated.
 */
struct mbuf *
m_get_clrd(int how, short type)
{
    struct mbuf *mb;

    mb = (struct mbuf *)mb_alloc(&mb_list_mbuf, how, type, 0, NULL);
    if (mb != NULL) {
        _mb_setup(mb, type);
        bzero(mtod(mb, caddr_t), MLEN);
    }
    return (mb);
}

/*
 * Allocate and return a single M_PKTHDR pre-zero'd mbuf.  NULL is
 * returned on failure.
 *
 * Arguments:
 *  - how: M_TRYWAIT to try to block for kern.ipc.mbuf_wait number of ticks
 *    if really starved for memory.  M_DONTWAIT to never block.
 *  - type: the type of the mbuf being allocated.
 */
struct mbuf *
m_gethdr_clrd(int how, short type)
{
    struct mbuf *mb;

    mb = (struct mbuf *)mb_alloc(&mb_list_mbuf, how, type, 0, NULL);
    if (mb != NULL) {
        _mbhdr_setup(mb, type);
#ifdef MAC
        if (mac_init_mbuf(mb, MBTOM(how)) != 0) {
            m_free(mb);
            return (NULL);
        }
#endif
        bzero(mtod(mb, caddr_t), MHLEN);
    }
    return (mb);
}

/*
 * Free a single mbuf and any associated storage that it may have attached
 * to it.  The associated storage may not be immediately freed if its
 * reference count is above 1.  Returns the next mbuf in the chain following
 * the mbuf being freed.
 *
 * Arguments:
 *  - mb: the mbuf to free.
 */
struct mbuf *
m_free(struct mbuf *mb)
{
    struct mbuf *nb;
    int cchnum;
    short persist = 0;

#ifdef INVARIANTS
    if (mb->m_flags & M_FREELIST)
        kprintf("m_free detected a mbuf double-free");
    mb->m_flags |= M_FREELIST;
#endif
    if ((mb->m_flags & M_PKTHDR) != 0)
        m_tag_delete_chain(mb, NULL);
    nb = mb->m_next;
    if ((mb->m_flags & M_EXT) != 0) {
        MEXT_REM_REF(mb);
        if (atomic_cmp_set_int(mb->m_ext.ref_cnt, 0, 1)) {
            if (mb->m_ext.ext_type == EXT_CLUSTER) {
                mb_free(&mb_list_clust,
                    (caddr_t)mb->m_ext.ext_buf, MT_NOTMBUF,
                    MBP_PERSIST, &cchnum);
                persist = MBP_PERSISTENT;
            } else {
                (*(mb->m_ext.ext_free))(mb->m_ext.ext_buf,
                    mb->m_ext.ext_args);
                _mext_dealloc_ref(mb);
                persist = 0;
            }
        }
    }
    mb_free(&mb_list_mbuf, mb, mb->m_type, persist, &cchnum);
    return (nb);
}

/*
 * Free an entire chain of mbufs and associated external buffers, if
 * applicable.  Right now, we only optimize a little so that the cache
 * lock may be held across a single mbuf+cluster free.  Hopefully,
 * we'll eventually be holding the lock across more than merely two
 * consecutive frees but right now this is hard to implement because of
 * things like _mext_dealloc_ref (may do a FREE()) and atomic ops in the
 * loop.
 *
 *  - mb: the mbuf chain to free.
 */
void
m_freem(struct mbuf *mb)
{
//    while (mb != NULL)
//        mb = m_free(mb);
    struct mbuf *nb = NULL;
    while (mb != NULL) {
        if ((mb->m_flags & M_EXT) != 0 ) {
            if (mb->m_ext.ext_buf) {
                FSFREE(mb->m_ext.ext_buf, 0);
            }
        }
        nb = mb->m_next;
        FSFREE(mb, 0);
        mb = nb;
    }
}

/*
 * Fetch an mbuf with a cluster attached to it.  If one of the
 * allocations fails, the entire allocation fails.  This routine is
 * the preferred way of fetching both the mbuf and cluster together,
 * as it avoids having to unlock/relock between allocations.  Returns
 * NULL on failure.
 *
 * Arguments:
 *  - how: M_TRYWAIT to try to block for kern.ipc.mbuf_wait number of ticks
 *    if really starved for memory.  M_DONTWAIT to never block.
 *  - type: the type of the mbuf being allocated.
 *  - flags: any flags to pass to the mbuf being allocated; if this includes
 *    the M_PKTHDR bit, then the mbuf is configured as a M_PKTHDR mbuf.
 */
struct mbuf *
m_getcl(int how, short type, int flags)
{
    struct mbuf *mb;
    int cchnum;

    mb = (struct mbuf *)mb_alloc(&mb_list_mbuf, how, type,
        MBP_PERSIST, &cchnum);
    if (mb == NULL)
        return NULL;
    mb->m_type = type;
    mb->m_next = NULL;
    mb->m_flags = flags;
    if ((flags & M_PKTHDR) != 0) {
        mb->m_nextpkt = NULL;
        mb->m_pkthdr.rcvif = NULL;
        mb->m_pkthdr.csum_flags = 0;
        SLIST_INIT(&mb->m_pkthdr.tags);
    }

    mb->m_ext.ext_buf = (caddr_t)mb_alloc(&mb_list_clust, how,
        MT_NOTMBUF, MBP_PERSISTENT, &cchnum);
    if (mb->m_ext.ext_buf == NULL) {
        (void)m_free(mb);
        mb = NULL;
    } else {
        _mcl_setup(mb);
        _mext_init_ref(mb, &cl_refcntmap[cl2ref(mb->m_ext.ext_buf)]);
    }
#ifdef MAC
    if (flags & M_PKTHDR) {
        if (mac_init_mbuf(mb, MBTOM(how)) != 0) {
            m_free(mb);
            return (NULL);
        }
    }
#endif
    return (mb);
}

/*
 * Fetch a single mbuf cluster and attach it to an existing mbuf.  If
 * successfull, configures the provided mbuf to have mbuf->m_ext.ext_buf
 * pointing to the cluster, and sets the M_EXT bit in the mbuf's flags.
 * The M_EXT bit is not set on failure.
 *
 * Arguments:
 *  - mb: the existing mbuf to which to attach the allocated cluster.
 *  - how: M_TRYWAIT to try to block for kern.ipc.mbuf_wait number of ticks
 *    if really starved for memory.  M_DONTWAIT to never block.
 */
void
m_clget(struct mbuf *mb, int how)
{

//    mb->m_ext.ext_buf= (caddr_t)mb_alloc(&mb_list_clust,how,MT_NOTMBUF,
//        0, NULL);
    mb->m_ext.ext_buf= (caddr_t)Malloc(2048);
    if (mb->m_ext.ext_buf != NULL) {
        _mcl_setup(mb);
//        _mext_init_ref(mb, &cl_refcntmap[cl2ref(mb->m_ext.ext_buf)]);
        _mext_init_ref(mb, NULL);
    }
}

/*
 * Configure a provided mbuf to refer to the provided external storage
 * buffer and setup a reference count for said buffer.  If the setting
 * up of the reference count fails, the M_EXT bit will not be set.  If
 * successfull, the M_EXT bit is set in the mbuf's flags.
 *
 * Arguments:
 *  - mb: the existing mbuf to which to attach the provided buffer.
 *  - buf: the address of the provided external storage buffer.
 *  - size: the size of the provided buffer.
 *  - freef: a pointer to a routine that is responsible for freeing the
 *    provided external storage buffer.
 *  - args: a pointer to an argument structure (of any type) to be passed
 *    to the provided freef routine (may be NULL).
 *  - flags: any other flags to be passed to the provided mbuf.
 *  - type: the type that the external storage buffer should be labeled with.
 */
void
m_extadd(struct mbuf *mb, caddr_t buf, u_int size,
    void (*freef)(void *, void *), void *args, int flags, int type)
{
    u_int *ref_cnt = NULL;

    if (type == EXT_CLUSTER)
        ref_cnt = &cl_refcntmap[cl2ref(mb->m_ext.ext_buf)];
    else if (type == EXT_EXTREF)
        ref_cnt = mb->m_ext.ref_cnt;
    _mext_init_ref(mb, ref_cnt);
    if (mb->m_ext.ref_cnt != NULL) {
        mb->m_flags |= (M_EXT | flags);
        mb->m_ext.ext_buf = buf;
        mb->m_data = mb->m_ext.ext_buf;
        mb->m_ext.ext_size = size;
        mb->m_ext.ext_free = freef;
        mb->m_ext.ext_args = args;
        mb->m_ext.ext_type = type;
    }
}

/*
 * Change type of provided mbuf.  This is a relatively expensive operation
 * (due to the cost of statistics manipulations) and should be avoided, where
 * possible.
 *
 * Arguments:
 *  - mb: the provided mbuf for which the type needs to be changed.
 *  - new_type: the new type to change the mbuf to.
 */
void
m_chtype(struct mbuf *mb, short new_type)
{
    struct mb_gen_list *gen_list;

    gen_list = MB_GET_GEN_LIST(&mb_list_mbuf);
    MB_LOCK_CONT(gen_list);
    MB_MBTYPES_DEC(gen_list, mb->m_type, 1);
    MB_MBTYPES_INC(gen_list, new_type, 1);
    MB_UNLOCK_CONT(gen_list);
    mb->m_type = new_type;
}



EXTERN_C void* Kmem_suballoc(vm_offset_t *base, vm_offset_t *top, vm_size_t size)
{
    void *pAddress = DevMalloc(size, 0, 0);
    if (pAddress == NULL) {
        (*base) = 0;
        (*top) = 0;
        return NULL;
    }

    (*base) = (vm_offset_t)pAddress;
    (*top) =  (vm_offset_t)pAddress + size;
    return pAddress;
}
